CUDA_VISIBLE_DEVICES=0 python src/train.py \
--stage sft \
--do_train \
--model_name_or_path qwen/Qwen2-1.5B \
--dataset english_teacher_sft_data \
--template qwen \
--finetuning_type lora \
--output_dir /root/english_teacher/model \
--overwrite_cache \
--per_device_train_batch_size 4 \
--gradient_accumulation_steps 4 \
--lr_scheduler_type cosine \
--logging_steps 10 \
--save_strategy epoch \
--learning_rate 5e-5 \
--num_train_epochs 50.0 \
--plot_loss \
--bf16
